using Pkg
Pkg.activate(".")

using DataFrames, CSV, DataFramesMeta, StatsPlots, Downloads, ProgressMeter, Weave, 
    Loess, Arrow, Transducers, Memoization, ReverseDiff, Zygote, Turing, LinearAlgebra, ColorSchemes,
    Serialization

Getting Started With Julia for Data Analysis

In this project, we will download some files from the CA dept of Education which give averaged results for test scores in all schools in CA across various student groups within the schoool.

The data is available at: https://caaspp-elpac.cde.ca.gov/caaspp/ResearchFileListSB?ps=true&lstTestYear=2021&lstTestType=B&lstCounty=00&lstDistrict=00000#dl

We are going to get the Los Angeles county files which avoids unnecessary data.

Unfortunately some years have different "versions" and there is no way on the site to get a list of files, so we need to just by hand click through and find out the file names, for example:

https://caaspp-elpac.cde.ca.gov/caaspp/researchfiles/sbca2015all19csv_v3.zip for 2014-2015 LA County

https://caaspp-elpac.cde.ca.gov/caaspp/researchfiles/sbca2021all19csv_v2.zip for 2020-2021 LA County data...

there is no 2019-2020 data as COVID prevented the testing from happening.

versions = [2015 => 3, 2016 => 3, 2017 => 2, 2018 => 3, 2019 => 4 , 2021 => 2]


files = ["sb_ca$(vv[1])_all_19_csv_v$(vv[2]).zip" for vv in versions]
6-element Vector{String}:
 "sb_ca2015_all_19_csv_v3.zip"
 "sb_ca2016_all_19_csv_v3.zip"
 "sb_ca2017_all_19_csv_v2.zip"
 "sb_ca2018_all_19_csv_v3.zip"
 "sb_ca2019_all_19_csv_v4.zip"
 "sb_ca2021_all_19_csv_v2.zip"

We also need the entity file that describes the codes for each school/district etc and the codes for the tests and student group descriptions we'll assume for our analysis that none of the entities of interest changed during the time period and just get the recent ones.

push!(files,"sb_ca2021entities_csv.zip")
push!(files,"Tests.zip")
push!(files,"StudentGroups.zip")
9-element Vector{String}:
 "sb_ca2015_all_19_csv_v3.zip"
 "sb_ca2016_all_19_csv_v3.zip"
 "sb_ca2017_all_19_csv_v2.zip"
 "sb_ca2018_all_19_csv_v3.zip"
 "sb_ca2019_all_19_csv_v4.zip"
 "sb_ca2021_all_19_csv_v2.zip"
 "sb_ca2021entities_csv.zip"
 "Tests.zip"
 "StudentGroups.zip"

Downloading the files

We'll change directory into the data dir, download each of the files, unzip them, and then cd out of the directory.

We use try/finally to ensure that we wind up in the same directory even if there's an error, then read the directory to see what files we have...

try 
    cd("data")
    @showprogress 3 for f in files
        if ! ispath(f) # if we don't have the file already
            Downloads.download("https://caaspp-elpac.cde.ca.gov/caaspp/researchfiles/$(f)",f)
        end
        run(`unzip -u $(f)`)
    end
finally
    cd("..")
end

readdir("data")
Archive:  sb_ca2015_all_19_csv_v3.zip
Archive:  sb_ca2016_all_19_csv_v3.zip
Archive:  sb_ca2017_all_19_csv_v2.zip
Archive:  sb_ca2018_all_19_csv_v3.zip
Archive:  sb_ca2019_all_19_csv_v4.zip
Archive:  sb_ca2021_all_19_csv_v2.zip
Archive:  sb_ca2021entities_csv.zip
Archive:  Tests.zip
Archive:  StudentGroups.zip
19-element Vector{String}:
 "StudentGroups.txt"
 "StudentGroups.zip"
 "Tests.txt"
 "Tests.zip"
 "sb_ca2015_all_19_csv_v3.txt"
 "sb_ca2015_all_19_csv_v3.zip"
 "sb_ca2015_all_ascii_v3.zip"
 "sb_ca2016_all_19_csv_v3.txt"
 "sb_ca2016_all_19_csv_v3.zip"
 "sb_ca2017_all_19_csv_v2.txt"
 "sb_ca2017_all_19_csv_v2.zip"
 "sb_ca2018_all_19_csv_v3.txt"
 "sb_ca2018_all_19_csv_v3.zip"
 "sb_ca2019_all_19_csv_v4.txt"
 "sb_ca2019_all_19_csv_v4.zip"
 "sb_ca2021_all_19_csv_v2.txt"
 "sb_ca2021_all_19_csv_v2.zip"
 "sb_ca2021entities_csv.txt"
 "sb_ca2021entities_csv.zip"

We have all the files

Next, we need to read the data in, and try to subset down to the schools of interest. Unfortunately, the files don't all have the same columns, nor do they have the same delimeters! What do we do about that? DataFrames allows us to append!(df,df2) and handle the case where they don't have exactly the same columns by various methods by specifying the "cols" option (see docs) cols=:union causes the resulting dataset to have all the columns from both files, but with missing values where needed

testscores = DataFrame()
entities = DataFrame()
tests = DataFrame()
students = DataFrame()

const cachedir = "/var/cache/userdata/dlakelan/juliacache"

if ispath(joinpath(cachedir,"tests.arrow"))
    global tests = DataFrame(Arrow.Table(joinpath(cachedir,"tests.arrow")))
    global students = DataFrame(Arrow.Table(joinpath(cachedir,"students.arrow")))
    global entities = DataFrame(Arrow.Table(joinpath(cachedir,"entities.arrow")))

    # the dataframe for test scores is huge, 3.8 million rows and a lot of columns, we only care about these districts.
    # we run the rows of the Arrow.Table through a Filter transducer that filters on District_Code and collect the ones we care about, saving gigabytes
    # of RAM
    ourentities = @subset(entities,in.(:District_Name, Ref(["Pasadena Unified","Glendale Unified", "Alhambra Unified"])))
    @show ourentities
    let ourdistcodes = unique(ourentities.District_Code);
        global testscores = Tables.rows(Arrow.Table(joinpath(cachedir,"testscores.arrow"))) |> Filter(x -> !ismissing(x.District_Code) && x.District_Code in ourdistcodes && x.School_Code .!= 0) |> DataFrame
    end
else
    for f in filter(endswith(".txt"),readdir("data"))
        @show f
        if occursin("entities",f)
            global entities = CSV.read(joinpath("data",f),DataFrame; normalizenames=true, delim="^")
        elseif occursin("Student",f)
            global students = CSV.read(joinpath("data",f),DataFrame; normalizenames=true,delim="^")
        elseif occursin("Tests",f)
            global tests = CSV.read(joinpath("data",f),DataFrame; normalizenames=true,delim="^")
        else
            df = CSV.read(joinpath("data",f),DataFrame; normalizenames=true) # uses a standard "," delimiter
            append!(testscores,df; cols=:union) # we will collect all the various columns with missing data where they don't exist
        end
    end
    Arrow.write(joinpath(cachedir,"tests.arrow"),tests)
    Arrow.write(joinpath(cachedir,"students.arrow"),students)
    Arrow.write(joinpath(cachedir,"entities.arrow"),entities)
    Arrow.write(joinpath(cachedir,"testscores.arrow"),testscores)
end
ourentities = 78×10 DataFrame
 Row │ County_Code  District_Code  School_Code  Filler   Test_Year  Type_ID
  County_Name  District_Name     School_Name                        Zip_Cod
e
     │ Int64        Int64          Int64        Missing  Int64      Int64  
  String       String?           String?                            Int64?
─────┼─────────────────────────────────────────────────────────────────────
───────────────────────────────────────────────────────────────────────────
──
   1 │          19          64568            0  missing       2021        6
  Los Angeles  Glendale Unified  missing                             missin
g
   2 │          19          64568      1932144  missing       2021        7
  Los Angeles  Glendale Unified  Crescenta Valley High                 9121
4
   3 │          19          64568      1933472  missing       2021        7
  Los Angeles  Glendale Unified  Daily (Allan F.) High (Continuat…     9120
6
   4 │          19          64568      1933530  missing       2021        7
  Los Angeles  Glendale Unified  Glendale High                         9120
5
   5 │          19          64568      1934082  missing       2021        7
  Los Angeles  Glendale Unified  Herbert Hoover High                   9120
2
   6 │          19          64568      1964568  missing       2021        7
  Los Angeles  Glendale Unified  Glendale Unified District Level …     9120
6
   7 │          19          64568      1995497  missing       2021        7
  Los Angeles  Glendale Unified  Verdugo Academy                       9121
4
   8 │          19          64568      1996131  missing       2021        7
  Los Angeles  Glendale Unified  Anderson W. Clark Magnet High         9121
4
   9 │          19          64568      1996156  missing       2021        7
  Los Angeles  Glendale Unified  Jewel City Community Day              9120
4
  10 │          19          64568      6013619  missing       2021        7
  Los Angeles  Glendale Unified  Balboa Elementary                     9120
1
  11 │          19          64568      6013627  missing       2021        7
  Los Angeles  Glendale Unified  Cerritos Elementary                   9120
5
  12 │          19          64568      6013635  missing       2021        7
  Los Angeles  Glendale Unified  Columbus Elementary                   9120
3
  13 │          19          64568      6013643  missing       2021        7
  Los Angeles  Glendale Unified  Dunsmore Elementary                   9121
4
  14 │          19          64568      6013650  missing       2021        7
  Los Angeles  Glendale Unified  Thomas Edison Elementary              9120
4
  15 │          19          64568      6013676  missing       2021        7
  Los Angeles  Glendale Unified  Benjamin Franklin Elementary          9120
1
  16 │          19          64568      6013684  missing       2021        7
  Los Angeles  Glendale Unified  John C. Fremont Elementary            9120
8
  17 │          19          64568      6013692  missing       2021        7
  Los Angeles  Glendale Unified  Glenoaks Elementary                   9120
6
  18 │          19          64568      6013718  missing       2021        7
  Los Angeles  Glendale Unified  Thomas Jefferson Elementary           9120
1
  19 │          19          64568      6013726  missing       2021        7
  Los Angeles  Glendale Unified  Mark Keppel Elementary                9120
2
  20 │          19          64568      6013734  missing       2021        7
  Los Angeles  Glendale Unified  La Crescenta Elementary               9121
4
  21 │          19          64568      6013742  missing       2021        7
  Los Angeles  Glendale Unified  Abraham Lincoln Elementary            9121
4
  22 │          19          64568      6013767  missing       2021        7
  Los Angeles  Glendale Unified  Horace Mann Elementary                9120
5
  23 │          19          64568      6013775  missing       2021        7
  Los Angeles  Glendale Unified  John Marshall Elementary              9120
5
  24 │          19          64568      6013783  missing       2021        7
  Los Angeles  Glendale Unified  Monte Vista Elementary                9121
4
  25 │          19          64568      6013809  missing       2021        7
  Los Angeles  Glendale Unified  Mountain Avenue Elementary            9121
4
  26 │          19          64568      6013817  missing       2021        7
  Los Angeles  Glendale Unified  John Muir Elementary                  9120
5
  27 │          19          64568      6013825  missing       2021        7
  Los Angeles  Glendale Unified  Valley View Elementary                9121
4
  28 │          19          64568      6013833  missing       2021        7
  Los Angeles  Glendale Unified  Verdugo Woodlands Elementary          9120
8
  29 │          19          64568      6013841  missing       2021        7
  Los Angeles  Glendale Unified  R. D. White Elementary                9120
6
  30 │          19          64568      6057715  missing       2021        7
  Los Angeles  Glendale Unified  Theodore Roosevelt Middle             9120
6
  31 │          19          64568      6057723  missing       2021        7
  Los Angeles  Glendale Unified  Eleanor J. Toll Middle                9120
2
  32 │          19          64568      6061303  missing       2021        7
  Los Angeles  Glendale Unified  Rosemont Middle                       9121
4
  33 │          19          64568      6061311  missing       2021        7
  Los Angeles  Glendale Unified  Woodrow Wilson Middle                 9120
6
  34 │          19          64568      6099022  missing       2021        7
  Los Angeles  Glendale Unified  College View                          9120
7
  35 │          19          64881            0  missing       2021        6
  Los Angeles  Pasadena Unified  missing                             missin
g
  36 │          19          64881       117440  missing       2021        7
  Los Angeles  Pasadena Unified  CIS Academy                           9110
7
  37 │          19          64881       127746  missing       2021        7
  Los Angeles  Pasadena Unified  Sierra Madre Middle                   9102
4
  38 │          19          64881      1931062  missing       2021        7
  Los Angeles  Pasadena Unified  Blair High                            9110
6
  39 │          19          64881      1931674  missing       2021        7
  Los Angeles  Pasadena Unified  Marshall Fundamental                  9110
4
  40 │          19          64881      1932409  missing       2021        7
  Los Angeles  Pasadena Unified  Norma Coombs Elementary               9110
7
  41 │          19          64881      1936103  missing       2021        7
  Los Angeles  Pasadena Unified  John Muir High                        9110
3
  42 │          19          64881      1936806  missing       2021        7
  Los Angeles  Pasadena Unified  Rose City High (Continuation)         9110
1
  43 │          19          64881      1936822  missing       2021        7
  Los Angeles  Pasadena Unified  Pasadena High                         9110
7
  44 │          19          64881      1964881  missing       2021        7
  Los Angeles  Pasadena Unified  Pasadena Unified District Level …     9110
1
  45 │          19          64881      6021505  missing       2021        7
  Los Angeles  Pasadena Unified  Altadena Elementary                   9100
1
  46 │          19          64881      6021554  missing       2021        7
  Los Angeles  Pasadena Unified  Don Benito Fundamental                9110
7
  47 │          19          64881      6021570  missing       2021        7
  Los Angeles  Pasadena Unified  Field (Eugene) Elementary             9110
7
  48 │          19          64881      6021612  missing       2021        7
  Los Angeles  Pasadena Unified  Hamilton Elementary                   9110
7
  49 │          19          64881      6021620  missing       2021        7
  Los Angeles  Pasadena Unified  Jackson Elementary                    9100
1
  50 │          19          64881      6021679  missing       2021        7
  Los Angeles  Pasadena Unified  Longfellow (Henry W.) Elementary      9110
4
  51 │          19          64881      6021687  missing       2021        7
  Los Angeles  Pasadena Unified  Madison Elementary                    9110
4
  52 │          19          64881      6021729  missing       2021        7
  Los Angeles  Pasadena Unified  San Rafael Elementary                 9110
5
  53 │          19          64881      6021737  missing       2021        7
  Los Angeles  Pasadena Unified  Sierra Madre Elementary               9102
4
  54 │          19          64881      6021752  missing       2021        7
  Los Angeles  Pasadena Unified  Washington Middle                     9110
3
  55 │          19          64881      6021760  missing       2021        7
  Los Angeles  Pasadena Unified  Webster Elementary                    9110
4
  56 │          19          64881      6021778  missing       2021        7
  Los Angeles  Pasadena Unified  Willard Elementary                    9110
7
  57 │          19          64881      6058465  missing       2021        7
  Los Angeles  Pasadena Unified  Charles W. Eliot Middle               9100
1
  58 │          19          64881      6119549  missing       2021        7
  Los Angeles  Pasadena Unified  Washington Elementary                 9110
3
  59 │          19          64881      6120265  missing       2021        7
  Los Angeles  Pasadena Unified  McKinley                              9110
1
  60 │          19          75713            0  missing       2021        6
  Los Angeles  Alhambra Unified  missing                             missin
g
  61 │          19          75713      1930163  missing       2021        7
  Los Angeles  Alhambra Unified  Alhambra High                         9180
1
  62 │          19          75713      1934553  missing       2021        7
  Los Angeles  Alhambra Unified  Mark Keppel High                      9180
1
  63 │          19          75713      1937697  missing       2021        7
  Los Angeles  Alhambra Unified  San Gabriel High                      9177
6
  64 │          19          75713      1975713  missing       2021        7
  Los Angeles  Alhambra Unified  Alhambra Unified District Level …     9180
3
  65 │          19          75713      1995430  missing       2021        7
  Los Angeles  Alhambra Unified  Independence High (Alternative)       9180
1
  66 │          19          75713      6011001  missing       2021        7
  Los Angeles  Alhambra Unified  Brightwood Elementary                 9175
4
  67 │          19          75713      6011019  missing       2021        7
  Los Angeles  Alhambra Unified  Emery Park Elementary                 9180
3
  68 │          19          75713      6011027  missing       2021        7
  Los Angeles  Alhambra Unified  Fremont Elementary                    9180
3
  69 │          19          75713      6011035  missing       2021        7
  Los Angeles  Alhambra Unified  Garfield Elementary                   9180
1
  70 │          19          75713      6011043  missing       2021        7
  Los Angeles  Alhambra Unified  Granada Elementary                    9180
1
  71 │          19          75713      6011050  missing       2021        7
  Los Angeles  Alhambra Unified  Marguerita Elementary                 9180
3
  72 │          19          75713      6011068  missing       2021        7
  Los Angeles  Alhambra Unified  Martha Baldwin Elementary             9180
1
  73 │          19          75713      6011076  missing       2021        7
  Los Angeles  Alhambra Unified  Monterey Highlands Elementary         9175
4
  74 │          19          75713      6011084  missing       2021        7
  Los Angeles  Alhambra Unified  Park Elementary                       9180
1
  75 │          19          75713      6011092  missing       2021        7
  Los Angeles  Alhambra Unified  Ramona Elementary                     9180
3
  76 │          19          75713      6011100  missing       2021        7
  Los Angeles  Alhambra Unified  Repetto Elementary                    9175
4
  77 │          19          75713      6011118  missing       2021        7
  Los Angeles  Alhambra Unified  William Northrup Elementary           9180
1
  78 │          19          75713      6011126  missing       2021        7
  Los Angeles  Alhambra Unified  Ynez Elementary                       9175
4
133505×39 DataFrame
133480 rows omitted
RowCounty_CodeDistrict_CodeSchool_CodeFillerTest_YearSubgroup_IDTest_TypeTotal_CAASPP_EnrollmentTotal_Tested_At_Entity_LevelTotal_Tested_at_Subgroup_LevelGradeTest_IdCAASPP_Reported_EnrollmentStudents_TestedMean_Scale_ScorePercentage_Standard_ExceededPercentage_Standard_MetPercentage_Standard_Met_and_AbovePercentage_Standard_Nearly_MetPercentage_Standard_Not_MetStudents_with_ScoresArea_1_Percentage_Above_StandardArea_1_Percentage_At_or_Near_StandardArea_1_Percentage_Below_StandardArea_2_Percentage_Above_StandardArea_2_Percentage_At_or_Near_StandardArea_2_Percentage_Below_StandardArea_3_Percentage_Above_StandardArea_3_Percentage_At_or_Near_StandardArea_3_Percentage_Below_StandardArea_4_Percentage_Above_StandardArea_4_Percentage_At_or_Near_StandardArea_4_Percentage_Below_StandardTotal_Tested_with_ScoresArea_1_Percentage_Near_StandardArea_2_Percentage_Near_StandardArea_3_Percentage_Near_StandardArea_4_Percentage_Near_StandardCounty_Code_District_Code_School_Code_Filler_Test_Year_Student_Group_ID_Test_Type_Total_Tested_at_Reporting_Level_Total_Tested_with_Scores_at_Reporting_Level_Grade_Test_ID_Students_Enrolled_Students_Tested_Mean_Scale_Score_Percentage_Standard_Exceeded_Percentage_Standard_Met_Percentage_Standard_Met_and_Above_Percentage_Standard_Nearly_Met_Percentage_Standard_Not_Met_Students_with_Scores_Area_1_Percentage_Above_Standard_Area_1_Percentage_Near_Standard_Area_1_Percentage_Below_Standard_Area_2_Percentage_Above_Standard_Area_2_Percentage_Near_Standard_Area_2_Percentage_Below_Standard_Area_3_Percentage_Above_Standard_Area_3_Percentage_Near_Standard_Area_3_Percentage_Below_Standard_Area_4_Percentage_Above_Standard_Area_4_Percentage_Near_Standard_Area_4_Percentage_Below_Standard_Type_ID
Int64Int64Int64MissingInt64Int64StringInt64?AnyInt64?Int64Int64AnyAnyString?StringStringStringStringStringAnyStringString?StringStringString?StringStringString?StringStringString?StringAnyString?String?String?String?Missing
11964568127415missing20151B3226110******0************missingmissingmissingmissingmissingmissing
21964568127415missing20151B3226210******0************missingmissingmissingmissingmissingmissing
31964568127415missing20151B3228211******0************missingmissingmissingmissingmissingmissing
41964568127415missing20151B3228111******0************missingmissingmissingmissingmissingmissing
51964568127415missing20151B32211111******0************missingmissingmissingmissingmissingmissing
61964568127415missing20151B32211210******0************missingmissingmissingmissingmissingmissing
71964568127415missing20151B32213231missing*****0************missingmissingmissingmissingmissingmissing
81964568127415missing20151B32213132missing*****0************missingmissingmissingmissingmissingmissing
91964568127415missing20153B3226110******0************missingmissingmissingmissingmissingmissing
101964568127415missing20153B3226210******0************missingmissingmissingmissingmissingmissing
111964568127415missing20153B3228111******0************missingmissingmissingmissingmissingmissing
121964568127415missing20153B3228211******0************missingmissingmissingmissingmissingmissing
131964568127415missing20153B32211210******0************missingmissingmissingmissingmissingmissing
13349419757136011126missing2019190Bmissing*missing132**missing*******missing**missing**missing**missing******missing
13349519757136011126missing2019202Bmissing314missing132314314missing42.9924.8467.8315.9216.2431452.23missing19.4339.49missing19.4338.85missing15.290.00missing0.0031428.3441.0845.860.00missing
13349619757136011126missing2019203Bmissing4missing13244missing*****4*missing**missing**missing**missing*4****missing
13349719757136011126missing2019204Bmissing87missing1328787missing8.059.2017.2427.5955.178710.34missing64.3710.34missing54.029.20missing50.570.00missing0.008725.2935.6340.230.00missing
13349819757136011126missing2019205Bmissing*missing132**missing*******missing**missing**missing**missing******missing
13349919757136011126missing2019206Bmissing4missing13244missing*****4*missing**missing**missing**missing*4****missing
13350019757136011126missing2019207Bmissing4missing13244missing*****4*missing**missing**missing**missing*4****missing
13350119757136011126missing2019222Bmissing110missing132111110missing57.2723.6480.9113.645.4511068.18missing8.1853.64missing10.0054.55missing6.360.00missing0.0011023.6436.3639.090.00missing
13350219757136011126missing2019223Bmissing5missing13255missing*****5*missing**missing**missing**missing*5****missing
13350319757136011126missing2019224Bmissing29missing1323029missing17.2420.6937.9317.2444.832913.79missing51.7220.69missing31.0320.69missing37.930.00missing0.002934.4848.2841.380.00missing
13350419757136011126missing2019226Bmissing*missing132**missing*******missing**missing**missing**missing******missing
13350519757136011126missing2019227Bmissing*missing132**missing*******missing**missing**missing**missing******missing

Some columns got parsed as strings rather than numbers. We need to "tryparse" these as Float64, if that returns nothing, we'll replace with missing, if the

function floatormiss(x)
    if ismissing(x)
        return missing
    end
    let v = tryparse(Float64,x);
        if isnothing(v)
            missing
        else
            v
        end
    end
end


testscores.Mean_Scale_Score = floatormiss.(testscores.Mean_Scale_Score)
133505-element Vector{Union{Missing, Float64}}:
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 ⋮
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
 missing
pusdentities = @subset(entities,.! ismissing.(:District_Name) .&& :District_Name .== "Pasadena Unified" .&& .! ismissing.(:School_Name))
alhambraentities = @subset(entities,.! ismissing.(:District_Name) .&& :District_Name .== "Alhambra Unified" .&& .! ismissing.(:School_Name))
glendaleentities = @subset(entities,.! ismissing.(:District_Name) .&& :District_Name .== "Glendale Unified" .&& .! ismissing.(:School_Name))

pusdtests = leftjoin(@select(pusdentities,:School_Code,:School_Name,:District_Name),testscores,on = :School_Code, matchmissing=:notequal)
alhambratests = leftjoin(@select(alhambraentities,:School_Code,:School_Name,:District_Name),testscores,on = :School_Code, matchmissing=:notequal)
glendaletests = leftjoin(@select(glendaleentities,:School_Code,:School_Name,:District_Name),testscores,on = :School_Code, matchmissing=:notequal)
47370×41 DataFrame
47345 rows omitted
RowSchool_CodeSchool_NameDistrict_NameCounty_CodeDistrict_CodeFillerTest_YearSubgroup_IDTest_TypeTotal_CAASPP_EnrollmentTotal_Tested_At_Entity_LevelTotal_Tested_at_Subgroup_LevelGradeTest_IdCAASPP_Reported_EnrollmentStudents_TestedMean_Scale_ScorePercentage_Standard_ExceededPercentage_Standard_MetPercentage_Standard_Met_and_AbovePercentage_Standard_Nearly_MetPercentage_Standard_Not_MetStudents_with_ScoresArea_1_Percentage_Above_StandardArea_1_Percentage_At_or_Near_StandardArea_1_Percentage_Below_StandardArea_2_Percentage_Above_StandardArea_2_Percentage_At_or_Near_StandardArea_2_Percentage_Below_StandardArea_3_Percentage_Above_StandardArea_3_Percentage_At_or_Near_StandardArea_3_Percentage_Below_StandardArea_4_Percentage_Above_StandardArea_4_Percentage_At_or_Near_StandardArea_4_Percentage_Below_StandardTotal_Tested_with_ScoresArea_1_Percentage_Near_StandardArea_2_Percentage_Near_StandardArea_3_Percentage_Near_StandardArea_4_Percentage_Near_StandardCounty_Code_District_Code_School_Code_Filler_Test_Year_Student_Group_ID_Test_Type_Total_Tested_at_Reporting_Level_Total_Tested_with_Scores_at_Reporting_Level_Grade_Test_ID_Students_Enrolled_Students_Tested_Mean_Scale_Score_Percentage_Standard_Exceeded_Percentage_Standard_Met_Percentage_Standard_Met_and_Above_Percentage_Standard_Nearly_Met_Percentage_Standard_Not_Met_Students_with_Scores_Area_1_Percentage_Above_Standard_Area_1_Percentage_Near_Standard_Area_1_Percentage_Below_Standard_Area_2_Percentage_Above_Standard_Area_2_Percentage_Near_Standard_Area_2_Percentage_Below_Standard_Area_3_Percentage_Above_Standard_Area_3_Percentage_Near_Standard_Area_3_Percentage_Below_Standard_Area_4_Percentage_Above_Standard_Area_4_Percentage_Near_Standard_Area_4_Percentage_Below_Standard_Type_ID
Int64String?String?Int64?Int64?MissingInt64?Int64?String?Int64?AnyInt64?Int64?Int64?AnyAnyFloat64?String?String?String?String?String?AnyString?String?String?String?String?String?String?String?String?String?String?String?AnyString?String?String?String?Missing
11932144Crescenta Valley HighGlendale Unified1964568missing20151B13561318131881110missing*****0************missingmissingmissingmissingmissingmissing
21932144Crescenta Valley HighGlendale Unified1964568missing20151B13561318131882110missing*****0************missingmissingmissingmissingmissingmissing
31932144Crescenta Valley HighGlendale Unified1964568missing20151B1356131813181126806652656.33329622117664453420374815395010000missingmissingmissingmissingmissingmissing
41932144Crescenta Valley HighGlendale Unified1964568missing20151B1356131813181116806672672.252338412466656377613263063662344missingmissingmissingmissingmissingmissing
51932144Crescenta Valley HighGlendale Unified1964568missing20151B135613181318131691667missing52338412466656377613263063662344missingmissingmissingmissingmissingmissing
61932144Crescenta Valley HighGlendale Unified1964568missing20151B135613181318132691665missing3329622117664453420374815395010000missingmissingmissingmissingmissingmissing
71932144Crescenta Valley HighGlendale Unified1964568missing20153B1356131865781110missing*****0************missingmissingmissingmissingmissingmissing
81932144Crescenta Valley HighGlendale Unified1964568missing20153B1356131865782110missing*****0************missingmissingmissingmissingmissingmissing
91932144Crescenta Valley HighGlendale Unified1964568missing20153B135613186571126803322655.13625612019331443125394715414711000missingmissingmissingmissingmissingmissing
101932144Crescenta Valley HighGlendale Unified1964568missing20153B135613186571116803312658.545358014633152409533892963855396missingmissingmissingmissingmissingmissing
111932144Crescenta Valley HighGlendale Unified1964568missing20153B13561318657131691331missing45358014633152409533892963855396missingmissingmissingmissingmissingmissing
121932144Crescenta Valley HighGlendale Unified1964568missing20153B13561318657132691332missing3625612019331443125394715414711000missingmissingmissingmissingmissingmissing
131932144Crescenta Valley HighGlendale Unified1964568missing20154B1356131866182110missing*****0************missingmissingmissingmissingmissingmissing
473596061311Woodrow Wilson MiddleGlendale Unified1964568missing2019203Bmissing34missing1323434missing41.1832.3573.5311.7614.713455.88missing23.5341.18missing17.6547.06missing8.820.00missing0.003420.5941.1844.120.00missing
473606061311Woodrow Wilson MiddleGlendale Unified1964568missing2019204Bmissing146missing132147146missing13.7015.7529.4522.6047.9514618.49missing53.4216.44missing48.6317.12missing41.780.00missing0.0014628.0834.9341.100.00missing
473616061311Woodrow Wilson MiddleGlendale Unified1964568missing2019205Bmissing*missing132**missing*******missing**missing**missing**missing******missing
473626061311Woodrow Wilson MiddleGlendale Unified1964568missing2019206Bmissing559missing132562559missing21.6519.1440.7924.3334.8855929.16missing42.0421.65missing35.4222.18missing28.980.00missing0.0055928.8042.9348.840.00missing
473636061311Woodrow Wilson MiddleGlendale Unified1964568missing2019207Bmissing5missing13255missing*****5*missing**missing**missing**missing*5****missing
473646061311Woodrow Wilson MiddleGlendale Unified1964568missing2019220Bmissing5missing13255missing*****5*missing**missing**missing**missing*5****missing
473656061311Woodrow Wilson MiddleGlendale Unified1964568missing2019222Bmissing31missing1323131missing48.3925.8174.1919.356.453154.84missing9.6851.61missing6.4545.16missing6.450.00missing0.003135.4841.9448.390.00missing
473666061311Woodrow Wilson MiddleGlendale Unified1964568missing2019223Bmissing45missing1324545missing48.8926.6775.5615.568.894553.33missing13.3351.11missing13.3346.67missing8.890.00missing0.004533.3335.5644.440.00missing
473676061311Woodrow Wilson MiddleGlendale Unified1964568missing2019224Bmissing53missing1325353missing35.8516.9852.8318.8728.305343.40missing32.0832.08missing28.3035.85missing33.960.00missing0.005324.5339.6230.190.00missing
473686061311Woodrow Wilson MiddleGlendale Unified1964568missing2019225Bmissing*missing132**missing*******missing**missing**missing**missing******missing
473696061311Woodrow Wilson MiddleGlendale Unified1964568missing2019226Bmissing306missing132309306missing33.6622.8856.5425.1618.3030638.89missing27.1231.70missing23.8633.01missing17.320.00missing0.0030633.9944.4449.670.00missing
473706061311Woodrow Wilson MiddleGlendale Unified1964568missing2019227Bmissing20missing1322020missing40.0025.0065.0025.0010.002055.00missing15.0035.00missing10.0030.00missing20.000.00missing0.002030.0055.0050.000.00missing

Now, let's just plot points...

We want points for each test... TestID = 1 means english, 2 means Math (for "smarter balanced" assessment). Let's group the tests by cohort, meaning TestYear - (Grade - 3) basically the year they were in 3rd grade which is the first year you take the test.

function plotschools(entities,testscores)

    for (testid,testname) in Iterators.zip([1,2],["English","Math"])
        let pl = []
            for sch in eachrow(entities)
                ourdf = @subset(testscores,:School_Code .== sch.School_Code .&& :Subgroup_ID .== 1 .&& :Grade .< 13 .&& :Test_Id .== testid .&& .!ismissing.(:Mean_Scale_Score))
    #            @show ourdf
                if nrow(ourdf) > 0
                    p = @df ourdf plot(:Grade,:Mean_Scale_Score; xlim=(3,12),ylim = (2250,2750),group=:Test_Year .- (:Grade .- 3),legend=false,size=(250,250), title="$(testname): $(sch.School_Name)",linewidth=3)
                    p = @df ourdf scatter!(:Grade,:Mean_Scale_Score; xlim=(3,12),ylim = (2250,2750),group=:Test_Year .- (:Grade .- 3),legend=false,size=(250,250), title="$(testname): $(sch.School_Name)",markerstrokewidth=0)
                    push!(pl,p)

                end
            end
            display(plot(pl...; size=(1500,1500)))
        end
    end
end

plotschools(pusdentities,@subset(pusdtests,:Subgroup_ID .== 1))
plotschools(alhambraentities,@subset(alhambratests,:Subgroup_ID .== 1))

Clearly there are some differences between schools. Since we are looking at the average across the school some of this will be because the demographic and socioeconomic mix of the students is different. For example the middle schoolers at the relative wealthy community of Sierra Madre Middle School are testing about the same as the high school 11th graders at Blair and Marshall Fundamental.

Also Alhambra schools appear to have higher achievement in general.

A considerable difference in overall average test scores can be attributed to a different mix of students. So let's break down the schools by demographic groups. The DemographicID in the students table describes the StudentGroups. We can break this down by:

"Economically Disadvantaged" vs "Not economically disadvantaged"

Race and ethnicity

Parent Education Level

For now, let's focus on math scores, and we'll iterate over every school, and output a graph that shows the average curve for each parent education level:

function plotedlevel(schools,scores,dist)

    edlevnames = Dict(90 => "No HSD", 91 => "HSD", 92 => "Some College", 93 => "College Grad", 94=>"Grad School")

    for sch in eachrow(schools)
        ourdf = @subset(scores,:Test_Id .== 2 .&& :School_Code .== sch.School_Code .&& in.(:Subgroup_ID ,Ref(90:94)))
        p = @df ourdf scatter(:Grade,:Mean_Scale_Score; xlim=(2.5,12), ylim=(2300,2800),title="$(dist)\nMath $(sch.School_Name)\nBy Parent Ed",label=false,markersize=3,size=(500,500))
        for edlev in 90:94
            subs = @subset(ourdf,:Subgroup_ID .== edlev .&& .!ismissing.(:Grade) .&& .! ismissing.(:Mean_Scale_Score))
            if nrow(subs) < 4 
                continue
            end
            println("There are $(nrow(subs)) observations for $(sch.School_Name)")
            grades = collect(minimum(subs.Grade):maximum(subs.Grade))
            if length(grades) > 2
                l = loess(Float64.(subs.Grade),Float64.(subs.Mean_Scale_Score))
                p = plot!(grades,map(x -> Loess.predict(l,x), Float64.(grades)); label=edlevnames[edlev], linewidth=3)
            end
        end


        display(p)
    end

end


plotedlevel(pusdentities,pusdtests,"PUSD")
plotedlevel(alhambraentities,alhambratests,"Alhambra")
plotedlevel(glendaleentities,glendaletests,"Glendale")
There are 4 observations for CIS Academy
There are 5 observations for CIS Academy
There are 8 observations for Sierra Madre Middle
There are 15 observations for Sierra Madre Middle
There are 15 observations for Sierra Madre Middle
There are 15 observations for Sierra Madre Middle
There are 20 observations for Blair High
There are 20 observations for Blair High
There are 20 observations for Blair High
There are 20 observations for Blair High
There are 17 observations for Blair High
There are 20 observations for Marshall Fundamental
There are 20 observations for Marshall Fundamental
There are 20 observations for Marshall Fundamental
There are 20 observations for Marshall Fundamental
There are 20 observations for Marshall Fundamental
There are 13 observations for Norma Coombs Elementary
There are 11 observations for Norma Coombs Elementary
There are 9 observations for Norma Coombs Elementary
There are 5 observations for Norma Coombs Elementary
There are 5 observations for John Muir High
There are 5 observations for John Muir High
There are 5 observations for John Muir High
There are 4 observations for John Muir High
There are 5 observations for Rose City High (Continuation)
There are 5 observations for Rose City High (Continuation)
There are 5 observations for Pasadena High
There are 5 observations for Pasadena High
There are 5 observations for Pasadena High
There are 5 observations for Pasadena High
There are 5 observations for Pasadena High
There are 6 observations for Altadena Elementary
There are 11 observations for Altadena Elementary
There are 5 observations for Don Benito Fundamental
There are 15 observations for Don Benito Fundamental
There are 15 observations for Don Benito Fundamental
There are 15 observations for Don Benito Fundamental
There are 4 observations for Field (Eugene) Elementary
There are 14 observations for Field (Eugene) Elementary
There are 14 observations for Field (Eugene) Elementary
There are 11 observations for Hamilton Elementary
There are 15 observations for Hamilton Elementary
There are 15 observations for Hamilton Elementary
There are 15 observations for Hamilton Elementary
There are 13 observations for Jackson Elementary
There are 14 observations for Jackson Elementary
There are 5 observations for Jackson Elementary
There are 10 observations for Longfellow (Henry W.) Elementary
There are 14 observations for Longfellow (Henry W.) Elementary
There are 11 observations for Longfellow (Henry W.) Elementary
There are 9 observations for Longfellow (Henry W.) Elementary
There are 15 observations for Madison Elementary
There are 15 observations for Madison Elementary
There are 4 observations for San Rafael Elementary
There are 12 observations for San Rafael Elementary
There are 15 observations for San Rafael Elementary
There are 6 observations for Sierra Madre Elementary
There are 13 observations for Sierra Madre Elementary
There are 15 observations for Sierra Madre Elementary
There are 15 observations for Sierra Madre Elementary
There are 15 observations for Washington Middle
There are 15 observations for Washington Middle
There are 11 observations for Washington Middle
There are 4 observations for Washington Middle
There are 12 observations for Webster Elementary
There are 13 observations for Webster Elementary
There are 12 observations for Webster Elementary
There are 9 observations for Webster Elementary
There are 13 observations for Willard Elementary
There are 15 observations for Willard Elementary
There are 15 observations for Willard Elementary
There are 15 observations for Willard Elementary
There are 7 observations for Willard Elementary
There are 15 observations for Charles W. Eliot Middle
There are 15 observations for Charles W. Eliot Middle
There are 15 observations for Charles W. Eliot Middle
There are 14 observations for Charles W. Eliot Middle
There are 5 observations for Charles W. Eliot Middle
There are 15 observations for Washington Elementary
There are 15 observations for Washington Elementary
There are 27 observations for McKinley
There are 30 observations for McKinley
There are 30 observations for McKinley
There are 30 observations for McKinley
There are 29 observations for McKinley
There are 5 observations for Alhambra High
There are 5 observations for Alhambra High
There are 5 observations for Alhambra High
There are 5 observations for Alhambra High
There are 5 observations for Alhambra High
There are 5 observations for Mark Keppel High
There are 5 observations for Mark Keppel High
There are 5 observations for Mark Keppel High
There are 5 observations for Mark Keppel High
There are 5 observations for Mark Keppel High
There are 5 observations for San Gabriel High
There are 5 observations for San Gabriel High
There are 5 observations for San Gabriel High
There are 5 observations for San Gabriel High
There are 5 observations for San Gabriel High
There are 29 observations for Brightwood Elementary
There are 30 observations for Brightwood Elementary
There are 30 observations for Brightwood Elementary
There are 30 observations for Brightwood Elementary
There are 26 observations for Emery Park Elementary
There are 29 observations for Emery Park Elementary
There are 12 observations for Emery Park Elementary
There are 29 observations for Fremont Elementary
There are 28 observations for Fremont Elementary
There are 14 observations for Fremont Elementary
There are 27 observations for Garfield Elementary
There are 30 observations for Garfield Elementary
There are 27 observations for Garfield Elementary
There are 8 observations for Garfield Elementary
There are 27 observations for Granada Elementary
There are 17 observations for Granada Elementary
There are 26 observations for Granada Elementary
There are 16 observations for Marguerita Elementary
There are 30 observations for Marguerita Elementary
There are 26 observations for Marguerita Elementary
There are 22 observations for Marguerita Elementary
There are 18 observations for Martha Baldwin Elementary
There are 30 observations for Martha Baldwin Elementary
There are 30 observations for Martha Baldwin Elementary
There are 30 observations for Martha Baldwin Elementary
There are 19 observations for Martha Baldwin Elementary
There are 7 observations for Monterey Highlands Elementary
There are 30 observations for Monterey Highlands Elementary
There are 28 observations for Monterey Highlands Elementary
There are 30 observations for Monterey Highlands Elementary
There are 30 observations for Monterey Highlands Elementary
There are 7 observations for Park Elementary
There are 30 observations for Park Elementary
There are 30 observations for Park Elementary
There are 30 observations for Park Elementary
There are 9 observations for Park Elementary
There are 23 observations for Ramona Elementary
There are 30 observations for Ramona Elementary
There are 30 observations for Ramona Elementary
There are 30 observations for Ramona Elementary
There are 11 observations for Ramona Elementary
There are 30 observations for Repetto Elementary
There are 29 observations for Repetto Elementary
There are 30 observations for Repetto Elementary
There are 25 observations for Repetto Elementary
There are 19 observations for William Northrup Elementary
There are 30 observations for William Northrup Elementary
There are 26 observations for William Northrup Elementary
There are 26 observations for William Northrup Elementary
There are 30 observations for Ynez Elementary
There are 30 observations for Ynez Elementary
There are 28 observations for Ynez Elementary
There are 28 observations for Ynez Elementary
There are 5 observations for Crescenta Valley High
There are 5 observations for Crescenta Valley High
There are 5 observations for Crescenta Valley High
There are 5 observations for Crescenta Valley High
There are 5 observations for Daily (Allan F.) High (Continuation)
There are 5 observations for Daily (Allan F.) High (Continuation)
There are 5 observations for Daily (Allan F.) High (Continuation)
There are 5 observations for Glendale High
There are 5 observations for Glendale High
There are 5 observations for Glendale High
There are 5 observations for Glendale High
There are 5 observations for Glendale High
There are 5 observations for Herbert Hoover High
There are 5 observations for Herbert Hoover High
There are 5 observations for Herbert Hoover High
There are 5 observations for Herbert Hoover High
There are 5 observations for Herbert Hoover High
There are 5 observations for Anderson W. Clark Magnet High
There are 5 observations for Anderson W. Clark Magnet High
There are 5 observations for Anderson W. Clark Magnet High
There are 5 observations for Anderson W. Clark Magnet High
There are 20 observations for Balboa Elementary
There are 19 observations for Balboa Elementary
There are 20 observations for Balboa Elementary
There are 17 observations for Balboa Elementary
There are 20 observations for Cerritos Elementary
There are 19 observations for Cerritos Elementary
There are 9 observations for Cerritos Elementary
There are 15 observations for Columbus Elementary
There are 14 observations for Columbus Elementary
There are 15 observations for Columbus Elementary
There are 8 observations for Columbus Elementary
There are 20 observations for Dunsmore Elementary
There are 20 observations for Dunsmore Elementary
There are 4 observations for Thomas Edison Elementary
There are 20 observations for Thomas Edison Elementary
There are 20 observations for Thomas Edison Elementary
There are 20 observations for Thomas Edison Elementary
There are 17 observations for Thomas Edison Elementary
There are 10 observations for Benjamin Franklin Elementary
There are 17 observations for Benjamin Franklin Elementary
There are 14 observations for Benjamin Franklin Elementary
There are 17 observations for John C. Fremont Elementary
There are 20 observations for John C. Fremont Elementary
There are 20 observations for John C. Fremont Elementary
There are 12 observations for Glenoaks Elementary
There are 20 observations for Glenoaks Elementary
There are 20 observations for Glenoaks Elementary
There are 20 observations for Thomas Jefferson Elementary
There are 10 observations for Thomas Jefferson Elementary
There are 20 observations for Thomas Jefferson Elementary
There are 13 observations for Thomas Jefferson Elementary
There are 15 observations for Mark Keppel Elementary
There are 15 observations for Mark Keppel Elementary
There are 15 observations for Mark Keppel Elementary
There are 15 observations for Mark Keppel Elementary
There are 5 observations for La Crescenta Elementary
There are 20 observations for La Crescenta Elementary
There are 20 observations for La Crescenta Elementary
There are 11 observations for Abraham Lincoln Elementary
There are 20 observations for Abraham Lincoln Elementary
There are 20 observations for Abraham Lincoln Elementary
There are 6 observations for Horace Mann Elementary
There are 15 observations for Horace Mann Elementary
There are 14 observations for Horace Mann Elementary
There are 15 observations for Horace Mann Elementary
There are 5 observations for Horace Mann Elementary
There are 15 observations for John Marshall Elementary
There are 8 observations for John Marshall Elementary
There are 15 observations for John Marshall Elementary
There are 10 observations for John Marshall Elementary
There are 20 observations for Monte Vista Elementary
There are 20 observations for Monte Vista Elementary
There are 5 observations for Mountain Avenue Elementary
There are 20 observations for Mountain Avenue Elementary
There are 20 observations for Mountain Avenue Elementary
There are 20 observations for John Muir Elementary
There are 20 observations for John Muir Elementary
There are 20 observations for John Muir Elementary
There are 16 observations for John Muir Elementary
There are 20 observations for Valley View Elementary
There are 20 observations for Valley View Elementary
There are 18 observations for Verdugo Woodlands Elementary
There are 20 observations for Verdugo Woodlands Elementary
There are 20 observations for Verdugo Woodlands Elementary
There are 15 observations for R. D. White Elementary
There are 14 observations for R. D. White Elementary
There are 15 observations for R. D. White Elementary
There are 15 observations for R. D. White Elementary
There are 14 observations for Theodore Roosevelt Middle
There are 15 observations for Theodore Roosevelt Middle
There are 15 observations for Theodore Roosevelt Middle
There are 15 observations for Theodore Roosevelt Middle
There are 12 observations for Theodore Roosevelt Middle
There are 10 observations for Eleanor J. Toll Middle
There are 15 observations for Eleanor J. Toll Middle
There are 15 observations for Eleanor J. Toll Middle
There are 15 observations for Eleanor J. Toll Middle
There are 15 observations for Eleanor J. Toll Middle
There are 10 observations for Rosemont Middle
There are 10 observations for Rosemont Middle
There are 10 observations for Rosemont Middle
There are 10 observations for Rosemont Middle
There are 12 observations for Woodrow Wilson Middle
There are 15 observations for Woodrow Wilson Middle
There are 15 observations for Woodrow Wilson Middle
There are 15 observations for Woodrow Wilson Middle
There are 15 observations for Woodrow Wilson Middle
function plotdistrictedlev(tests,name)
    edlevnames = Dict(90 => "No HSD", 91 => "HSD", 92 => "Some College", 93 => "College Grad", 94=>"Grad School")
    p = @df tests scatter(:Grade,:Mean_Scale_Score; xlim=(2.5,12), ylim=(2300,2800), title="$(name)\nMath Scores")
    for edlev in keys(edlevnames)
        subs = @subset(tests, :Subgroup_ID .== edlev .&& .! ismissing.(:Grade) .&& .! ismissing.(:Mean_Scale_Score))
        grades = collect(3:11)
        l = loess(Float64.(subs.Grade), Float64.(subs.Mean_Scale_Score))
        p = plot!(grades,map(x -> Loess.predict(l,x), Float64.(grades)); label = edlevnames[edlev],linewidth=3)
    end
    display(p)
    p
end

p1 = plotdistrictedlev(@subset(pusdtests,:Test_Id .== 2),"PUSD")
p2 = plotdistrictedlev(@subset(alhambratests,:Test_Id .== 2),"Alhambra")
p3 = plotdistrictedlev(@subset(glendaletests,:Test_Id .== 2),"Glendale")

plot(p1,p2,p3; layout=(1,3),legend=:topleft,size=(2000,500))

Let's summarize the result via a model

One reason to create a mathematical model is that it can simplify your understanding of a problem. For example a drag coefficient is a kind of summary of the entire pressure field on the surface of an object... integrated together it results in a certain amount of drag.

In our case we will assume that variation from year to year is low, so that we can pool the years together, and get a summary of how quickly students are learning math, by fitting a line through different groups of grades: (3,4,5) (6,7,8) and (8,11) we can summarize the general trend in each school as just 6 numbers (levels and slopes of each group), rather than the details of potentially hundreds of measurements.

We will assume the level and slope of these three lines depends on parents education level only, since we don't have individual student data with covariates where we could fit a more complicated model.

Using Turing.jl for a Bayesian model

@model function schoolmod(school,parented,grade,meanscore,nschools,nedlev)

    c4avg ~ Normal(2450.0,200.0)
    c7avg ~ Normal(2500.0,200.0)
    c11avg ~ Normal(2550.0,200.0)

    m4avg ~ Normal(0.0,100.0)
    m7avg ~ Normal(0.0,100.0)
    msd ~ Gamma(10.0,20.0/9.0)
    csd ~ Gamma(10.0,20.0/9.0)
    c4 ~ MvNormal(repeat([0.0],nschools),csd^2*I(nschools))
    c7 ~ MvNormal(repeat([0.0],nschools),csd^2*I(nschools))
    c11 ~ MvNormal(repeat([0.0],nschools),csd^2*I(nschools))
    m4 ~ MvNormal(repeat([0.0],nschools),msd^2*I(nschools))
    m7 ~ MvNormal(repeat([0.0],nschools),msd^2*I(nschools))

    edlevmult ~ filldist(Gamma(20.0,1.0/19),nschools)

    cedlev4 ~ MvNormal(repeat([0.0],nedlev),Diagonal([50.0,50.0,50.0,50.0,50.0].^2))
    cedlev7 ~ MvNormal(repeat([0.0],nedlev),Diagonal([50.0,50.0,50.0,50.0,50.0].^2))
    cedlev11 ~ MvNormal(repeat([0.0],nedlev),Diagonal([50.0,50.0,50.0,50.0,50.0].^2))

    medlev4 ~ MvNormal(repeat([0.0],nedlev),Diagonal([20.0,20.0,20.0,20.0,20.0].^2))
    medlev7 ~ MvNormal(repeat([0.0],nedlev),Diagonal([20.0,20.0,20.0,20.0,20.0].^2))

    s ~ Gamma(10.0,50/10.0)


    preds = zeros(eltype(s),length(meanscore))
    for (i,(s,ed,g,sc)) in Iterators.enumerate(Iterators.zip(school,parented,grade,meanscore))
        if ed  == 1 # base case
            med4 = med7 = ced4 = ced7 = ced11 = zero(eltype(cedlev4))
        else
            ced4 = cedlev4[ed] * edlevmult[s]
            ced7 = cedlev7[ed] * edlevmult[s]
            ced11 = cedlev11[ed] * edlevmult[s]
            med4 = medlev4[ed] * edlevmult[s]
            med7 = medlev7[ed] * edlevmult[s]
        end
        if g in (3,4,5)
            preds[i] = (c4avg + c4[s] + ced4) + (m4avg + m4[s] + med4) * (g-4)
        elseif g in (6,7,8)
            preds[i] = (c7avg + c7[s] + ced7) + (m7avg + m7[s] + med7) * (g-7)
        elseif g == 11
            preds[i] = (c11avg + c11[s] + ced11)
        end
    end
    meanscore ~ MvNormal(preds,s^2 * I(length(preds)))
end

edlevtests = @chain begin @subset(testscores,.!ismissing.(:Mean_Scale_Score) .&& :Test_Id .== 2 .&& :School_Code .!= 0 .&& in.(:Subgroup_ID,Ref((90,91,92,93,94))))
    @transform(:Edlev = :Subgroup_ID .- 89)
    @transform(:Score = Float64.(:Mean_Scale_Score))
end

schoolids = map(Pair,edlevtests.District_Code,edlevtests.School_Code)
uniqueids = unique(schoolids)
schoolnames = Dict()
schooldists = Dict()
schoolgrades = Dict()

for (i,k) in Iterators.enumerate(uniqueids)
    schoolnames[i] = "Unknown"
    schoolnames[k] = "Unknown"
    for g in 1:11
        schoolgrades[k => g] = false
    end
    schooldists[k] = "Unknown"
    schooldists[i] = "Unknown"
end

for r in eachrow(entities)
    schoolnames[r.District_Code => r.School_Code] = r.School_Name
    schooldists[r.District_Code => r.School_Code] = r.District_Name
    for grade in 1:11
        schoolgrades[(r.District_Code => r.School_Code) => grade ] = false
    end
end

for i in 1:length(uniqueids)
    schooldists[i] = schooldists[uniqueids[i]]
end

for r in eachrow(testscores)
    schoolgrades[(r.District_Code => r.School_Code) => r.Grade ] = true
end

schoolDict = Dict()
merge!(schoolDict,Dict(map(Pair,uniqueids,1:length(uniqueids))))
merge!(schoolDict,Dict(map(Pair,1:length(uniqueids),uniqueids)))

for i in 1:length(uniqueids)
    schoolnames[i] = haskey(schoolnames,schoolDict[i]) ? schoolnames[schoolDict[i]] : "Unknown";
end

smod = schoolmod(map(x -> schoolDict[x],schoolids),edlevtests.Edlev,edlevtests.Grade,edlevtests.Score,length(uniqueids),5)

Turing.setadbackend(:reversediff)
Turing.setrdcache(true)

if ispath("cache/mcmcchain.dat")
    ch = deserialize("cache/mcmcchain.dat")
else
    ch = sample(smod,NUTS(900,.8),MCMCThreads(),500,2)
end
Chains MCMC chain (500×519×2 Array{Float64, 3}):

Iterations        = 901:1:1400
Number of chains  = 2
Samples per chain = 500
Wall duration     = 1762.8 seconds
Compute duration  = 3408.96 seconds
parameters        = c4avg, c7avg, c11avg, m4avg, m7avg, msd, csd, c4[1], c4
[2], c4[3], c4[4], c4[5], c4[6], c4[7], c4[8], c4[9], c4[10], c4[11], c4[12
], c4[13], c4[14], c4[15], c4[16], c4[17], c4[18], c4[19], c4[20], c4[21], 
c4[22], c4[23], c4[24], c4[25], c4[26], c4[27], c4[28], c4[29], c4[30], c4[
31], c4[32], c4[33], c4[34], c4[35], c4[36], c4[37], c4[38], c4[39], c4[40]
, c4[41], c4[42], c4[43], c4[44], c4[45], c4[46], c4[47], c4[48], c4[49], c
4[50], c4[51], c4[52], c4[53], c4[54], c4[55], c4[56], c4[57], c4[58], c4[5
9], c4[60], c4[61], c4[62], c4[63], c4[64], c4[65], c4[66], c4[67], c4[68],
 c4[69], c4[70], c4[71], c4[72], c4[73], c4[74], c4[75], c4[76], c4[77], c4
[78], c4[79], c7[1], c7[2], c7[3], c7[4], c7[5], c7[6], c7[7], c7[8], c7[9]
, c7[10], c7[11], c7[12], c7[13], c7[14], c7[15], c7[16], c7[17], c7[18], c
7[19], c7[20], c7[21], c7[22], c7[23], c7[24], c7[25], c7[26], c7[27], c7[2
8], c7[29], c7[30], c7[31], c7[32], c7[33], c7[34], c7[35], c7[36], c7[37],
 c7[38], c7[39], c7[40], c7[41], c7[42], c7[43], c7[44], c7[45], c7[46], c7
[47], c7[48], c7[49], c7[50], c7[51], c7[52], c7[53], c7[54], c7[55], c7[56
], c7[57], c7[58], c7[59], c7[60], c7[61], c7[62], c7[63], c7[64], c7[65], 
c7[66], c7[67], c7[68], c7[69], c7[70], c7[71], c7[72], c7[73], c7[74], c7[
75], c7[76], c7[77], c7[78], c7[79], c11[1], c11[2], c11[3], c11[4], c11[5]
, c11[6], c11[7], c11[8], c11[9], c11[10], c11[11], c11[12], c11[13], c11[1
4], c11[15], c11[16], c11[17], c11[18], c11[19], c11[20], c11[21], c11[22],
 c11[23], c11[24], c11[25], c11[26], c11[27], c11[28], c11[29], c11[30], c1
1[31], c11[32], c11[33], c11[34], c11[35], c11[36], c11[37], c11[38], c11[3
9], c11[40], c11[41], c11[42], c11[43], c11[44], c11[45], c11[46], c11[47],
 c11[48], c11[49], c11[50], c11[51], c11[52], c11[53], c11[54], c11[55], c1
1[56], c11[57], c11[58], c11[59], c11[60], c11[61], c11[62], c11[63], c11[6
4], c11[65], c11[66], c11[67], c11[68], c11[69], c11[70], c11[71], c11[72],
 c11[73], c11[74], c11[75], c11[76], c11[77], c11[78], c11[79], m4[1], m4[2
], m4[3], m4[4], m4[5], m4[6], m4[7], m4[8], m4[9], m4[10], m4[11], m4[12],
 m4[13], m4[14], m4[15], m4[16], m4[17], m4[18], m4[19], m4[20], m4[21], m4
[22], m4[23], m4[24], m4[25], m4[26], m4[27], m4[28], m4[29], m4[30], m4[31
], m4[32], m4[33], m4[34], m4[35], m4[36], m4[37], m4[38], m4[39], m4[40], 
m4[41], m4[42], m4[43], m4[44], m4[45], m4[46], m4[47], m4[48], m4[49], m4[
50], m4[51], m4[52], m4[53], m4[54], m4[55], m4[56], m4[57], m4[58], m4[59]
, m4[60], m4[61], m4[62], m4[63], m4[64], m4[65], m4[66], m4[67], m4[68], m
4[69], m4[70], m4[71], m4[72], m4[73], m4[74], m4[75], m4[76], m4[77], m4[7
8], m4[79], m7[1], m7[2], m7[3], m7[4], m7[5], m7[6], m7[7], m7[8], m7[9], 
m7[10], m7[11], m7[12], m7[13], m7[14], m7[15], m7[16], m7[17], m7[18], m7[
19], m7[20], m7[21], m7[22], m7[23], m7[24], m7[25], m7[26], m7[27], m7[28]
, m7[29], m7[30], m7[31], m7[32], m7[33], m7[34], m7[35], m7[36], m7[37], m
7[38], m7[39], m7[40], m7[41], m7[42], m7[43], m7[44], m7[45], m7[46], m7[4
7], m7[48], m7[49], m7[50], m7[51], m7[52], m7[53], m7[54], m7[55], m7[56],
 m7[57], m7[58], m7[59], m7[60], m7[61], m7[62], m7[63], m7[64], m7[65], m7
[66], m7[67], m7[68], m7[69], m7[70], m7[71], m7[72], m7[73], m7[74], m7[75
], m7[76], m7[77], m7[78], m7[79], edlevmult[1], edlevmult[2], edlevmult[3]
, edlevmult[4], edlevmult[5], edlevmult[6], edlevmult[7], edlevmult[8], edl
evmult[9], edlevmult[10], edlevmult[11], edlevmult[12], edlevmult[13], edle
vmult[14], edlevmult[15], edlevmult[16], edlevmult[17], edlevmult[18], edle
vmult[19], edlevmult[20], edlevmult[21], edlevmult[22], edlevmult[23], edle
vmult[24], edlevmult[25], edlevmult[26], edlevmult[27], edlevmult[28], edle
vmult[29], edlevmult[30], edlevmult[31], edlevmult[32], edlevmult[33], edle
vmult[34], edlevmult[35], edlevmult[36], edlevmult[37], edlevmult[38], edle
vmult[39], edlevmult[40], edlevmult[41], edlevmult[42], edlevmult[43], edle
vmult[44], edlevmult[45], edlevmult[46], edlevmult[47], edlevmult[48], edle
vmult[49], edlevmult[50], edlevmult[51], edlevmult[52], edlevmult[53], edle
vmult[54], edlevmult[55], edlevmult[56], edlevmult[57], edlevmult[58], edle
vmult[59], edlevmult[60], edlevmult[61], edlevmult[62], edlevmult[63], edle
vmult[64], edlevmult[65], edlevmult[66], edlevmult[67], edlevmult[68], edle
vmult[69], edlevmult[70], edlevmult[71], edlevmult[72], edlevmult[73], edle
vmult[74], edlevmult[75], edlevmult[76], edlevmult[77], edlevmult[78], edle
vmult[79], cedlev4[1], cedlev4[2], cedlev4[3], cedlev4[4], cedlev4[5], cedl
ev7[1], cedlev7[2], cedlev7[3], cedlev7[4], cedlev7[5], cedlev11[1], cedlev
11[2], cedlev11[3], cedlev11[4], cedlev11[5], medlev4[1], medlev4[2], medle
v4[3], medlev4[4], medlev4[5], medlev7[1], medlev7[2], medlev7[3], medlev7[
4], medlev7[5], s
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, h
amiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, 
tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters        mean       std   naive_se      mcse         ess      rh
at  ⋯
      Symbol     Float64   Float64    Float64   Float64     Float64   Float
64  ⋯

       c4avg   2440.4255    5.8882     0.1862    0.6243     71.1144    1.03
24  ⋯
       c7avg   2513.8311    6.3879     0.2020    0.5647    119.5444    1.00
41  ⋯
      c11avg   2514.1965    9.5378     0.3016    0.8242    114.0035    1.00
54  ⋯
       m4avg     29.7964    2.5963     0.0821    0.1507    346.9391    1.00
55  ⋯
       m7avg     21.9920    2.6512     0.0838    0.1394    344.6919    1.00
06  ⋯
         msd     10.0978    0.9613     0.0304    0.0464    346.3260    1.00
38  ⋯
         csd     40.0965    2.6541     0.0839    0.1197    551.0185    1.00
18  ⋯
       c4[1]      1.0475   40.4098     1.2779    1.0450   1591.2607    0.99
95  ⋯
       c4[2]      0.9106   41.3182     1.3066    1.2316   1198.0163    1.00
08  ⋯
       c4[3]      0.8545   41.0887     1.2993    0.9990   2250.3768    1.00
00  ⋯
       c4[4]      0.5061   40.6234     1.2846    0.7586   2965.5892    1.00
13  ⋯
       c4[5]     -1.0709   38.3358     1.2123    0.9495   1664.0303    1.00
07  ⋯
       c4[6]     12.2531    6.7666     0.2140    0.6051     93.6259    1.03
40  ⋯
       c4[7]      4.1737    7.3038     0.2310    0.6110    126.5082    1.01
20  ⋯
       c4[8]    -10.9930    7.3825     0.2335    0.5977    125.2130    1.01
83  ⋯
       c4[9]     10.5860   11.1110     0.3514    0.5759    303.9863    1.00
51  ⋯
      c4[10]     -9.3139    6.8594     0.2169    0.5941    108.4339    1.01
93  ⋯
      ⋮            ⋮          ⋮         ⋮          ⋮          ⋮          ⋮ 
    ⋱
                                                   1 column and 490 rows om
itted

Quantiles
  parameters        2.5%       25.0%       50.0%       75.0%       97.5%
      Symbol     Float64     Float64     Float64     Float64     Float64

       c4avg   2429.3568   2436.1968   2440.1172   2444.4851   2452.6015
       c7avg   2502.0066   2509.3511   2513.5854   2518.2346   2526.6647
      c11avg   2494.7246   2508.0144   2514.3996   2520.6381   2532.9437
       m4avg     24.7613     28.1452     29.7375     31.4657     34.9334
       m7avg     16.7809     20.2899     21.9509     23.8360     27.0437
         msd      8.3538      9.4444     10.0167     10.7269     12.1284
         csd     35.3150     38.2139     39.9481     41.8059     45.5391
       c4[1]    -75.4480    -26.9471      1.1705     28.8115     78.9201
       c4[2]    -74.9908    -26.8124      0.0271     29.1100     81.6853
       c4[3]    -84.6190    -27.1160      1.6508     29.3042     77.5984
       c4[4]    -78.3777    -27.2266      0.2631     28.5506     77.4234
       c4[5]    -82.3857    -25.0379     -1.7541     24.0425     73.8144
       c4[6]     -1.0539      7.7063     12.5619     16.9552     25.5576
       c4[7]    -10.5137     -0.6167      4.5432      9.2025     18.1292
       c4[8]    -25.8019    -15.9027    -10.6718     -5.9972      2.7659
       c4[9]    -11.1233      3.2125     11.0321     18.5573     31.4475
      c4[10]    -22.7640    -14.1670     -8.9317     -4.5674      3.7168
      ⋮            ⋮           ⋮           ⋮           ⋮           ⋮
                                                          490 rows omitted

Now that we have fit our model, we have hundreds of samples of hundreds of parameters. Let's take a look at their distribution

for sch in 1:length(uniqueids)
    dist = schooldists[sch]
    p = plot(; linewidth=3, title = "$dist : $(schoolnames[sch])", xlim=(2.5,12),ylim=(2300,2700),legend=false)
    c4avg = mean(ch[:,Symbol("c4avg"),:])
    m4avg = mean(ch[:,Symbol("m4avg"),:])
    c7avg = mean(ch[:,Symbol("c7avg"),:])
    m7avg = mean(ch[:,Symbol("m7avg"),:])
    c11avg = mean(ch[:,Symbol("c11avg"),:])
    edlevcolors = colorschemes[:tol_light] # a RG colorblind friendly pallette
    has4 = schoolgrades[schoolDict[sch] => 4] 
    has7 = schoolgrades[schoolDict[sch] => 7] 
    has11 = schoolgrades[schoolDict[sch] => 11] 
    for ed in 1:5

        c4 = mean(ch[:,Symbol("c4[$sch]"),:] + ch[:,Symbol("cedlev4[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + c4avg
        m4 = mean(ch[:,Symbol("m4[$sch]"),:] + ch[:,Symbol("medlev4[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + m4avg
        c7 = mean(ch[:,Symbol("c7[$sch]"),:] + ch[:,Symbol("cedlev7[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + c7avg
        m7 = mean(ch[:,Symbol("m7[$sch]"),:] + ch[:,Symbol("medlev7[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + m7avg 
        c11 = mean(ch[:,Symbol("c11[$sch]"),:] + ch[:,Symbol("cedlev11[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + c11avg
        
        al = has4 ? 1.0 : .1
        p = plot!([3,5], [c4-m4,c4+m4]; linewidth=6, color=edlevcolors[ed], xlim=(2.5,12),ylim=(2300,2800),legend=false, alpha=al)
        al = has7 ? 1.0 : .1
        p = plot!([6,8], [c7-m7,c7+m7]; linewidth=6, color=edlevcolors[ed],alpha=al)
        al = has11 ? 1.0 : .1
        p = scatter!([11], [c11]; markersize=6,alpha=al,color=edlevcolors[ed])
        for i in 1:10
            edmult =  ch[i,Symbol("edlevmult[$sch]"),1]
            j = rand(1:length(ch))
            c = ch[j,Symbol("c4[$sch]"),1] + ch[j,Symbol("cedlev4[$ed]"),1] * edmult + c4avg
            m = ch[j,Symbol("m4[$sch]"),1] + ch[j,Symbol("medlev4[$ed]"),1] * edmult + m4avg
            p = plot!([3,5],[c - m, c + m],label=false,alpha=.2,color=edlevcolors[ed])

            c =  ch[j,Symbol("c7[$sch]"),1] + ch[j,Symbol("cedlev7[$ed]"),1] * edmult + c7avg
            m = ch[j,Symbol("m7[$sch]"),1] + ch[j,Symbol("medlev7[$ed]"),1] * edmult + m7avg
            p = plot!([6,8],[c - m, c + m],label=false,alpha=.2,color=edlevcolors[ed])
            c11 = ch[j,Symbol("c11[$sch]"),1] + ch[j,Symbol("cedlev11[$ed]"),1] * edmult + c11avg
            p = scatter!([randn()*.2 + 11],[c11],label=false,alpha=.2,color=edlevcolors[ed])
        end
    end
    display(p)
end